## Header
# visualize microsat data 2016-2020 quarantine animals
# last updated 2-1-2022


## prep workspace
  rm(list=ls())
  sapply(c("adegenet","pegas","hierfstat","RColorBrewer","related","poppr","mmod"),require,character.only=TRUE)
  cur.dir=("Q:\\new O drive\\population conservation\\genetics\\population structure")
  setwd(cur.dir)
  rm(cur.dir)
  

  
  
## Run DAPC
  
  #read in tabular genotype data and convert to genid object
    d=read.table("data files\\tabular format.txt",colClasses = "character",header=TRUE)
    d$Year=as.numeric(d$Year)

  #clean to radiocollar data  
    d=d[grep("Y", d$ID), ]
    d=d[order(d$ID),]
    nrow(d)
  
  #create genid object
    data=df2genind(X=d[,c(9:ncol(d))],
                 sep="/",
                 ind.names=d$ID,
                 ploidy = 2,
                 NA.char="000",
                 pop=rep("YELL",nrow(d))) 

  #filter missing data
    propTyped(data,by="ind") #remove only if >1 -->no missing data

  #Basic Stats
    b=basic.stats(data, diploid = TRUE)
    smry=summary(data)

    #Hardy-Weinberg
      barplot(smry$Hexp-smry$Hobs, main="Heterozygosity: expected-observed",ylab="Hexp - Hobs")
      mean(smry$Hobs); sd(smry$Hobs); min(smry$Hobs); max(smry$Hobs)
      mean(smry$Hexp); sd(smry$Hexp); min(smry$Hexp); max(smry$Hexp)
      bartlett.test(list(smry$Hexp,smry$Hobs)) #tst of variance
      t.test(smry$Hexp,smry$Hobs,pair=T,var.equal=TRUE,alter="greater") #test of means
      hwt=hw.test(data, B=0)#hwt test per loci
        hwt[which(hwt[,3]<0.05),]#report loci that failed H-W-E
    
    #allelic richness
      barplot(smry$loc.n.all, ylab="Number of alleles",main="Number of alleles per locus")
      ar=allelic.richness(data,min.n=NULL,diploid=TRUE)
          mean(ar$Ar[,1]);sd(ar$Ar[,1])
   
    # Inbreeding coefficeint
      mean(b$Fis[,1]); sd(b$Fis[,1])
  
    # Find genetic clusters
      find.clusters(data, max.n.clust=10) #tool to explore clustering 
      
      #make from 2 to 5 groups (with 6+ groups clusters overlaps substantially)
      grp.2=find.clusters(data, n.pca=50,n.clust=2) 
      grp.3=find.clusters(data, n.pca=30,n.clust=3)
      grp.4=find.clusters(data, n.pca=30,n.clust=4)
      grp.5=find.clusters(data, n.pca=30,n.clust=5)

    # DAPC
      #plot function
        myInset <- function(dapc){
          temp <- 100* cumsum(dapc$pca.eig)/sum(dapc$pca.eig)
          plot(temp, col=rep(c("black","lightgrey"),c(dapc$n.pca,1000)), ylim=c(0,100),
             xlab="PCA axis", ylab="Variance (%)",
             cex=.8, pch=20, type="h", lwd=2)
          }
      
      dapc2=dapc(data, grp.2$grp, n.pca=50, n.da=1)
        scatter(dapc2, bg="white",col=brewer.pal(8,"Set1"),legend=TRUE)
        assignplot(dapc2)
        compoplot(dapc2, posi="bottomright",
                  txt.leg=paste("Cluster", 1:2), lab="",
                  ncol=1, xlab="individuals")
        
      dapc3=dapc(data, grp.3$grp, n.pca=30, n.da=2)
        scatter(dapc3, bg="white", col=brewer.pal(8,"Set1"),scree.da=FALSE,scree.pca = FALSE)
        add.scatter(myInset(dapc3), posi="bottomleft",inset=c(-0.01,-0.01), ratio=.2,
                    bg=transp("white"))
        assignplot(dapc3)
        compoplot(dapc3, posi="bottomright",
                  txt.leg=paste("Cluster", 1:3), lab="",
                  ncol=1, xlab="individuals")
        
      dapc4=dapc(data, grp.4$grp, n.pca=30, n.da=3)
        scatter(dapc4, bg="white", col=brewer.pal(8,"Set1"),scree.da=FALSE,scree.pca = FALSE)
        add.scatter(myInset(dapc4), posi="topleft",inset=c(0.01,-0.15), ratio=.2,
                  bg=transp("white"))
        assignplot(dapc4)
        compoplot(dapc4, posi="bottomright",
                  txt.leg=paste("Cluster", 1:4), lab="",
                  ncol=1, xlab="individuals")
        
      dapc5=dapc(data, grp.5$grp, n.pca=30, n.da=4)
        scatter(dapc5, bg="white", col=brewer.pal(8,"Set1"),scree.da=FALSE,scree.pca = FALSE)
        add.scatter(myInset(dapc5), posi="bottomright",inset=c(-0.01,-0.01), ratio=.2,
                  bg=transp("white"))
        assignplot(dapc5)
        compoplot(dapc5, posi="bottomright",
                  txt.leg=paste("Cluster", 1:4), lab="",
                  ncol=1, xlab="individuals")
        
    # genetic distance
      #check for two groups
        data=df2genind(X=d[,c(9:ncol(d))],
                       sep="/",
                       ind.names=d$ID,
                       ploidy = 2,
                       NA.char="000",
                       pop=grp.2$grp)
        Gst_Hedrick(data)
        
      #estimate distance matrix
        micsatdist=provesti.dist(data)
          write.csv(as.matrix(micsatdist),"data files\\microsat distance matrix ProvestiIndex radiocollars.csv")
        datasub=data[sample(nInd(data), 30)]
        aboot(datasub, dist = provesti.dist, sample = 200, tree = "nj", cutoff = 50, quiet = TRUE)
      
      
      
      
## Estimate pairwise relatedness
  data=readgenotypedata ("data files\\collardata_relatedformat.txt")
  
  #estimate relatedness for 6 indexes
    rvals=coancestry(data$gdata,
                   trioml =1,
                   lynchli =1,
                   lynchrd =1,
                   quellergt =1,
                   ritland =1,
                   wang =1,
                   dyadml =1)
    write.csv(rvals$relatedness,"data files\\relatedness indices radiocollared bison.csv",row.names = FALSE)
  
  #simulate datasets and check estimator accuracy
    compareestimators(data, 25)
      
      #simualte 25 each of unrelated, half-sib, fullsib, parent-offspring pairs
      urval <- rep (0 , 25)
      hsval <- rep (0.25 , 25)
      fsval <- rep (0.5 , 25)
      poval <- rep (0.5 , 25)
      relvals <- c(poval, fsval, hsval, urval)
      sim <- familysim ( data$freqs , 25)
      
      #estimate relatedness vals
      output <- coancestry ( sim , 
                             trioml =1,
                             lynchli =1,
                             lynchrd =1,
                             quellergt =1,
                             ritland =1,
                             wang =1,
                             dyadml =1)
      #remove redundant pairs
      simrel <- cleanuprvals (output$relatedness , 25) 
      
      #Corr coef of simulated and estimated vals
      cor ( relvals , simrel$trioml) 
      cor ( relvals , simrel$wang) 
      cor ( relvals , simrel$lynchli)
      cor ( relvals , simrel$lynchrd) 
      cor ( relvals , simrel$ritland) 
      cor ( relvals , simrel$quellergt)
      cor ( relvals , simrel$dyadml) 
 

    
 
  